import re
import requests
import csv

# for start in range(0, 226, 25):

# 设置url 以及 参数(页数不同), 进行请求
# 网址
url = "https://movie.douban.com/top250"
# 参数
# param = {
#     "start": start
# }

# 请求头
head = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}

# 发送请求
resp = requests.get(url, headers=head)
print(resp)
# 获得网页源码
page_content = resp.text

# 正则表达式进行查找
obj = re.compile(r'<span class="title">(?P<name>.*?)</span>.*?'
                 r'<p class="">.*?<br>(?P<year>.*?)&nbsp'
                 r'.*?<span class="rating_num" property="v:average">'
                 r'(?P<score>.*?)</span>.*?<span>(?P<people>.*?)人评价</span>', re.S)

# 获得查询结果
res = obj.finditer(page_content)

# 准备文件, 将结果写入文件中, 以便进行数据分析
f = open("douban_data.csv", mode="w", encoding="utf-8")
csv_writer = csv.writer(f)

# 遍历结果
for it in res:
    if "&" not in it.group("name"):
        dic = it.groupdict()  # 将数据存储在字典中
        dic["year"] = dic["year"].strip()  # 调整year 中的格式
        csv_writer.writerow(dic.values())  # 将数据写入csv 中

f.close()
resp.close()  # 关闭请求
print("over")

